In [ ]:
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings(action='ignore')

feature = pd.read_csv('Feature_Dataset.csv')
shuffled = sklearn.utils.shuffle(feature)
In [ ]:
from sklearn.model_selection import train_test_split

# train : test = 0.8 : 0.2
train, test = train_test_split(shuffled, test_size=0.2)
In [ ]:
y_train = train.iloc[:,:7]
x_train = train.iloc[:,7:]

y_test = test.iloc[:,:7]
x_test = test.iloc[:,7:]
In [ ]:
# min-MAX norm.

x_train = (x_train - x_train.mean())/x_train.std()
x_test = (x_test - x_test.mean())/x_test.std()
In [ ]:
yy_train = np.argmax(y_train.to_numpy(), axis=1)
yy_test = np.argmax(y_test.to_numpy(), axis=1)

In [ ]:
#RandomForest test

from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(max_depth=5)
rf.fit(x_train,yy_train)
Out[ ]:
RandomForestClassifier(max_depth=5)
In [ ]:
from sklearn.metrics import accuracy_score

prd = rf.predict(x_test)

accuracy = accuracy_score(yy_test, prd)

print('Acc: ', accuracy)
Acc:  0.75
In [ ]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

classes = ['Conga&Bongo', 'Crash&OpenHat', 'Hi-Hat', 'Kick', 'Shaker', 'Snare', 'Tom']

cm = pd.DataFrame(confusion_matrix(yy_test, prd), columns=classes, index=classes)
sns.heatmap(cm, annot=True)
Out[ ]:
<AxesSubplot:>
In [ ]:
# define feature names (from 9th cell of extract.ipynb)
feature_name = []
feature_name.append('duration')
for ch in ['ZCR', 'Flt', 'Bdw', 'Roll']:
    for i in range(14):
        name = str(ch) + '[' + str(i) + ']'
        feature_name.append(name)
In [ ]:
# Extract single tree
estimator = rf.estimators_[5]

from sklearn.tree import export_graphviz
# Export as dot file
export_graphviz(estimator, out_file='tree.dot', 
                feature_names = feature_name,
                class_names = classes,
                rounded = True, proportion = False, 
                precision = 2, filled = True)

# Convert to png using system command (requires Graphviz)
from subprocess import call
call(['dot', '-Tpng', 'tree.dot', '-o', 'tree.png', '-Gdpi=600'])

# Display in jupyter notebook
from IPython.display import Image
Image(filename = 'tree.png')
Out[ ]:

  • Classifier Comparison
In [ ]:
from sklearn.metrics import accuracy_score, log_loss
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.neural_network import MLPClassifier

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="rbf", C=0.025, probability=True),
    NuSVC(probability=True),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    AdaBoostClassifier(),
    GradientBoostingClassifier(),
    GaussianNB(),
    LinearDiscriminantAnalysis(),
    QuadraticDiscriminantAnalysis(),
    MLPClassifier(alpha=1, max_iter=1000)
    ]

# Logging for Visual Comparison
log_cols=["Classifier", "Accuracy", "Log Loss"]
log = pd.DataFrame(columns=log_cols)

for clf in classifiers:
    clf.fit(x_train, yy_train)
    name = clf.__class__.__name__
    
    print("="*30)
    print(name)
    
    print('****Results****')
    train_predictions = clf.predict(x_test)
    acc = accuracy_score(yy_test, train_predictions)
    print("Accuracy: {:.4%}".format(acc))
    
    train_predictions = clf.predict_proba(x_test)
    ll = log_loss(yy_test, train_predictions)
    print("Log Loss: {}".format(ll))
    
    log_entry = pd.DataFrame([[name, acc*100, ll]], columns=log_cols)
    log = log.append(log_entry)
    
print("="*30)
==============================
KNeighborsClassifier
****Results****
Accuracy: 66.9118%
Log Loss: 6.128277598514636
==============================
SVC
****Results****
Accuracy: 38.9706%
Log Loss: 0.9916739454351426
==============================
NuSVC
****Results****
Accuracy: 72.0588%
Log Loss: 0.7207909185016207
==============================
DecisionTreeClassifier
****Results****
Accuracy: 72.0588%
Log Loss: 9.650540463283873
==============================
RandomForestClassifier
****Results****
Accuracy: 76.4706%
Log Loss: 1.0770614451183227
==============================
AdaBoostClassifier
****Results****
Accuracy: 32.3529%
Log Loss: 3.860725515386247
==============================
GradientBoostingClassifier
****Results****
Accuracy: 77.9412%
Log Loss: 0.7663721369166487
==============================
GaussianNB
****Results****
Accuracy: 58.8235%
Log Loss: 9.179024213052204
==============================
LinearDiscriminantAnalysis
****Results****
Accuracy: 74.2647%
Log Loss: 1.5838210200380267
==============================
QuadraticDiscriminantAnalysis
****Results****
Accuracy: 38.9706%
Log Loss: 21.081219404720372
==============================
MLPClassifier
****Results****
Accuracy: 76.4706%
Log Loss: 0.6914668319333664
==============================
In [ ]:
sns.set_color_codes("muted")
sns.barplot(x='Accuracy', y='Classifier', data=log, color="b")

plt.xlabel('Accuracy %')
plt.title('Classifier Accuracy')
plt.show()

sns.set_color_codes("muted")
sns.barplot(x='Log Loss', y='Classifier', data=log, color="g")

plt.xlabel('Log Loss')
plt.title('Classifier Log Loss')
plt.show()
In [ ]: